There's no great batteries-included way to do that (that I'm aware of). You could try to do it with the builtin containers, maybe something like:
data = [(some, headers, go, here),
(data, from, first, row),
(data, from, second, row),
... ,
(data, from, last, row)]
and then build a custom key to sort it, like:
import operator
data = data[0] + sorted(data[1:],key=operator.itemgetter(col_num_to_sort_by))
But that's kind of ugly. It may be more maintainable to create a custom class that handles all that itself.
import operator
class Table(list): # inherits from list since it's just a list of lists
def __init__(self, headers, rows_of_data):
self.maxcellwidth = 0
self.headers = headers
for cell in self.headers:
self.maxcellwidth = max(len(str(cell)), self.maxcellwidth)
for row in rows_of_data:
self.append(row)
for cell in row:
self.maxcellwidth = max(len(str(cell)), self.maxcellwidth)
def sort_by_column(self, sort_by):
"""Sort by column and return a new Table"""
return Table(self.headers, sorted(self, key=operator.itemgetter(sort_by)))
def sort_by_columnip(self, sort_by):
"""Sort by column in-place"""
self.sort(key=operator.itemgetter(sort_by))
def __str__(self):
return_value = list()
header = "|".join(["{0:{1}}".format(header,self.maxcellwidth) for header in self.headers])
return_value.append(header)
return_value.append("-" * len(header))
for row in self:
return_value.append("|".join(["{0:{1}}".format(cell,self.maxcellwidth) for cell in row]))
return "\n".join(return_value)
# TEST
>>> headers = ("one","two","three","four")
>>> data = [('1','2','3','5'), ('5','6','7','4')]
>>> table = Table(headers, data)
>>> print(table)
one |two |three|four
-----------------------
1 |2 |3 |5
5 |6 |7 |4
>>> table.sort_by_columnip(3)
>>> print(table)
one |two |three|four
-----------------------
5 |6 |7 |4
1 |2 |3 |5
Answer from Adam Smith on Stack Overflowpandas - How to sort Table in Python - Stack Overflow
sorting - In Python, how can I store table and sort it based on one column? - Stack Overflow
python - How to sort pandas dataframe by one column - Stack Overflow
sorting - How to sort a table of data in python? - Stack Overflow
Videos
Use sort_values to sort the df by a specific column's values:
In [18]:
df.sort_values('2')
Out[18]:
0 1 2
4 85.6 January 1.0
3 95.5 February 2.0
7 104.8 March 3.0
0 354.7 April 4.0
8 283.5 May 5.0
6 238.7 June 6.0
5 152.0 July 7.0
1 55.4 August 8.0
11 212.7 September 9.0
10 249.6 October 10.0
9 278.8 November 11.0
2 176.5 December 12.0
If you want to sort by two columns, pass a list of column labels to sort_values with the column labels ordered according to sort priority. If you use df.sort_values(['2', '0']), the result would be sorted by column 2 then column 0. Granted, this does not really make sense for this example because each value in df['2'] is unique.
I tried the solutions above and I do not achieve results, so I found a different solution that works for me. The ascending=False is to order the dataframe in descending order, by default it is True. I am using python 3.6.6 and pandas 0.23.4 versions.
final_df = df.sort_values(by=['2'], ascending=False)
You can see more details in pandas documentation here.
This should do what you are looking for:
In [1]: df = pd.DataFrame.from_dict([{'Country': 'A', 'Year':2012, 'Value': 20, 'Volume': 1}, {'Country': 'B', 'Year':2012, 'Value': 100, 'Volume': 2}, {'Country': 'C', 'Year':2013, 'Value': 40, 'Volume': 4}])
In [2]: df_pivot = pd.pivot_table(df, index=['Country'], columns = ['Year'],values=['Value'], fill_value=0)
In [3]: df_pivot
Out [4]:
Value
Year 2012 2013
Country
A 20 0
B 100 0
C 0 40
In [5]: df = df_pivot.reindex(df_pivot['Value'].sort_values(by=2012, ascending=False).index)
Out [6]:
Value
Year 2012 2013
Country
B 100 0
A 20 0
C 0 40
Basically it gets the index of the sorted values and reindex the initial pivot table.
you can sort on more than one column in the pivot table. In my case, I have the probability of accident at postcode and probability of accident at address to sort descending and display the results in a heatmap.
pivot = df.pivot_table(index=['postcode'],values=['probability_at_address','probability_at_postcode'],aggfunc='mean').sort_values(by=['probability_at_address','probability_at_postcode'],ascending=False)
fig,ax=plt.subplots(figsize=(10,20))
sns.heatmap(pivot,cmap="Blues",ax=ax)
plt.show()