import pandas as pd import plotly.graph_objects as gr data = { 'Sending_Dept': ['5A', '4A', '5B', '5A', '4B', '4A', '3A', '3B', '3A', '3B', '3A', '3B'], 'Accepting_Dept': ['4B', '5B', '5A', '5B', '4A', '4B', '5B', '5A', '4B', '4A', '3B', '3A'], 'FlowValue': [1, 3, 4, 3, 4, 4, 1, 1, 3, 2, 5, 3] } df = pd.DataFrame(data) unique_departments = set(df['Sending_Dept']).union(set(df['Accepting_Dept'])) Depts = list(unique_departments) Dept_indices = {} for i, dept in enumerate(Depts): Dept_indices[dept] = i sending_indices = [] for dept in df['Sending_Dept']:
dept_index = Dept_indices[dept] sending_indices.append(dept_index) print(f"Sending indices are: {sending_indices}") accepting_indices = [] for dept in df['Accepting_Dept']: dept_index = Dept_indices[dept] accepting_indices.append(dept_index) flowvalues = df['FlowValue'].tolist() # Sankey diagram fig = gr.Figure(data=[gr.Sankey( node=dict( pad=10,thickness=25,line=dict(color="red", width=0.8),label=Depts,), link=dict(source=sending_indices,target=accepting_indices,value=flowvalues ))]) fig.update_layout(title_text="Sankey Diagram of exchange students flow between University Depts", font_size=12) fig.show()
生成的"桑基图"图(1)中,节点3A旁的橙色矩形显示了光标放置在节点上时的情况。当光标位于节点"3A"上时,我们可以看到A大学3系接受和派遣交换生的频率。它接受学生1次,派遣学生3次。我们还可以从上面代码片段中的 data 字典推断出这一点,因为"3A"在Sending_Dept列表中出现了3次,在Accepting_Dept列表中出现了1次。节点 "3A" 左边的数字9是它向B大学派出的交换生总数。我们还可以通过在Sending_Dept列表中添加与3A相对应的FlowValues来推断。
import matplotlib.pyplot as plt import numpy as np np.random.seed(125) num_houses = 10 distances = np.random.uniform(0, 30, num_houses) # distance from city center prices = np.random.uniform(400, 2000, num_houses) * 1000# sale price in thousands directions = np.random.choice(['N', 'S', 'E', 'W'], num_houses) # direction from city center agent_levels = np.random.choice([1, 2, 3], num_houses) # agent's level defget_emoji_size(level): size_map = {1: 250, 2: 380, 3: 700} return size_map.get(level, 120) # Increased size for better visibility defget_emoji_color_new(price): if price 600000: return'white'# Light yellow for $400k-$600k elif price 800000: return'yellow'# White for $600k-$800k elif price 1000000: return'pink'# Pink for $800k-$1 million else: return'lime'# Lime for $1 million-$2 million defrotate_smiley(direction): rotation_map = {'N': 0, 'E': 270, 'S': 180, 'W': 90} return rotation_map.get(direction, 0
) # default no rotation if direction not found plt.figure(figsize=(12, 8)) for i in range(num_houses): plt.scatter(distances[i], prices[i], s=get_emoji_size(agent_levels[i]),\ c=get_emoji_color_new(prices[i]), marker='o', edgecolors='black', alpha=0.8) plt.text(distances[i], prices[i], "😊", fontsize=agent_levels[i]*10, rotation=rotate_smiley(directions[i]), ha='center', va='center',\ fontweight='bold') plt.xlabel('Distance from City Center (km)') plt.ylabel('Sale Price ($)') plt.title('House Sales Data for 10 Houses: Price vs Distance with New Color Scheme') plt.grid(True) plt.show()
如上面的代码和下面图 2 中的散点图所示,X 轴和 Y 轴分别对应于与市中心的距离和销售价格。以下是一些要点:
import seaborn as sns import matplotlib.pyplot as plt import numpy as np import pandas as pd np.random.seed(0) size_of_house = np.random.uniform(1000, 4000, 100) # Size in square feet price = size_of_house * np.random.uniform(150, 350) + np.random.normal(0, 50000, 100) # Price distance_from_train = np.random.uniform(0.5, 5, 100) # Distance from train in miles distance_from_ocean = np.random.uniform(0.1, 10, 100) # Distance from ocean in miles df = pd.DataFrame({ 'Size of House': size_of_house, 'Price': price, 'Distance from Train': distance_from_train, 'Distance from Ocean': distance_from_ocean }) # Sample 10 points for a less cluttered plot sampled_df = df.sample(10) # Adding the inset # Filtering sampled data for the inset plot #inset_data = sampled_df[(sampled_df['Size of House'] >= 2000) & (sampled_df['Size of House'] <= 3000) & (sampled_df['Price'] >= 250000) & (sampled_df['Price'] <= 600000)] inset_data=sampled_df inset_ax = ax.inset_axes([0.7, 0.05, 0.25, 0.25]) # Inset axes # Scatter plot in the inset with filtered sampled data sns.scatterplot(data=inset_data, x='Size of House', y='Price', ax=inset_ax, size='Distance from Train', sizes=(40, 200), hue='Distance from Ocean', alpha=0.7, marker='D', legend=False) # Capturing the part of the original regression line that falls within the bounds sns.regplot( x='Size of House', y='Price', data=df, # Using the entire dataset for the trend line scatter=False, color='red', ax=inset_ax, lowess=True, truncate=True# Truncating the line within the limits of the inset axes ) # Adjusting the limits of the inset axes inset_ax.set_xlim(2000, 3500) inset_ax.set_ylim(600000, 1000000) # Display the plot plt.show()
from wordcloud import WordCloud import matplotlib.pyplot as plt
# Sample from the reviews of a book reviews = """ 🏴☠️宝藏级🏴☠️ 原创公众号『数据STUDIO』内容超级硬核。公众号以Python为核心语言,垂直于数据科学领域,包括可戳👉 Python|MySQL|数据分析|数据可视化|机器学习与数据挖掘|爬虫 等,从入门到进阶! """ font_path="/System/Library/fonts/PingFang.ttc"#解决中文乱码
import numpy as np import matplotlib.pyplot as plt np.random.seed(0)
ages = np.random.randint(18, 61, 20) # Random ages between 18 and 60 for 20 customers purchase_amounts = np.random.randint(200, 1001, 20) # Random purchase amounts between 200 and 1000 for 20 customers plt.figure(figsize=(10, 6)) plt.hist2d(ages, purchase_amounts, bins=[5, 5], range=[[18, 60], [200, 1000]], cmap='plasma') plt.colorbar(label='Number of Customers') plt.xlabel('Age') plt.ylabel('Purchase Amount ($)') plt.title('2D Histogram of Customer Age and Purchase Amount')