import os path = './data' emails = [] for file in os.listdir(path): file = open(path + '//' + file, 'r') for line in file: if'@'in line: emails.append(line.rstrip('\n')) file.close() emails.sort() print(emails)
withopen('week4.txt','w') as file: file.write("This is the first line\n") file.write("This is the second line\n") file.write("The end\n") #new content to an existinf file withopen('week4.txt','a') as file: file.write("Extra line added\n")
Q4 Get current directory
1 2
import os os.getcwd()
1
'C:\\Users\\f2401539\\Desktop'
Q5
1 2 3 4 5 6
file = open(os.getcwd() + '\districts.txt','r') districts = [line for line in file] districts.sort() for i inrange(0,18): print(districts[i]) file.close()
courses = [['Course Code', 'Year', 'Semester','Course Name'], ['COMP7035', '2022-23', 'Sem A', 'Python for Data Analytics and Artificial Intelligence'], ['COMP1007', '2021-22', 'Sem B','Introduction to Python and Its Applications']] f = open('courses.csv','w') with f: writer = csv.writer(f) for row in courses: writer.writerow(row)
1 2 3 4 5 6 7 8
import csv f = open('courses.csv','r')
with f: reader = csv.reader(f) for row in reader(f) for row in reader: print(row)
1 2 3 4 5 6
['Course Code', 'Year', 'Semester', 'Course Name'] [] ['COMP7035', '2022-23', 'Sem A', 'Python for Data Analytics and Artificial Intelligence'] [] ['COMP1007', '2021-22', 'Sem B', 'Introduction to Python and Its Applications'] []
Lab5 Numpy
create an array of the integers from 20 to 50
1 2 3
import numpy as np array = np.arange(20,51) print(array)
df = read_csv('elderly.csv') year = df['Year'].values.tolist() print(year) sixtyFiveAbove = df['65 years old and above'].values.tolist() print(sixtyFiveAbove)
8. Write down as many ways of forming a list that contains the values of Series elements
1 2 3 4 5 6 7 8
val_1 = pd_series_dict.to_list() val_2 = []
for idx, ival in pd_series_dict.iteritems(): val_2.append(ival) val_3 = pd_series_dict.values val_tmp = pd_series_dict.index val_4 = [pd_series_dict[ikey] for ikey in pd_series_dict.index]
9. Calculate the proportion of elements that are larger than the mean value of the Series
Subset of the above Data Series: 11 11 12 12 13 13 14 14 15 15 dtype: int64
Lab9 Pandas_2
1 2
import numpy as np import pandas as pd
1. Write codes to create two DataFrames df_left, df_right, with the columns as “[key, lval1, lval2]” and “[key, rval1, rval2]”, and the values are “[a,b,c]”, and “[b,c,d]” respectively. Generate random numbers with normal distribution to for the “lval” and “rval” elements
key lval1 lval2 0 a -0.306740 0.370246 1 b -1.633727 -0.351369 2 c 1.558975 -0.179692 --------- key rval1 rval2 0 b -0.036699 0.724182 1 c -1.241680 -1.695795 2 d 1.580775 -1.271330
2. Compute the left outer join of df_left and df_right, check out the results
1 2
left_merge = pd.merge(left_df, right_df, how = 'left') print(left_merge)
1 2 3 4
key lval1 lval2 rval1 rval2 0 a -0.306740 0.370246 NaN NaN 1 b -1.633727 -0.351369 -0.036699 0.724182 2 c 1.558975 -0.179692 -1.241680 -1.695795
3. Change the name “key” of df_left to “key_left”, re-run step 2 and see what happens
1
left_df.columns = ['key_left','lval1','lval2']
4. Compute the right outer join of df_left and df_right in step 2, check out the results
1 2
right_merge = pd.merge(left_df, right_df, how = 'right') print(right_merge)
1 2 3 4
key lval1 lval2 rval1 rval2 0 b -1.633727 -0.351369 -0.036699 0.724182 1 c 1.558975 -0.179692 -1.241680 -1.695795 2 d NaN NaN 1.580775 -1.271330
5. Compute the full outer join of df_left and df_right in step 2, check out the results
1 2
outer_merge = pd.merge(left_df, right_df, how = 'outer') print(outer_merge)
1 2 3 4 5
key lval1 lval2 rval1 rval2 0 a -0.306740 0.370246 NaN NaN 1 b -1.633727 -0.351369 -0.036699 0.724182 2 c 1.558975 -0.179692 -1.241680 -1.695795 3 d NaN NaN 1.580775 -1.271330
6. Compute the inner join of df_left and df_right in step 2, check out the results
1 2
inner_merge = pd.merge(left_df, right_df, how = 'inner') print(inner_merge)
1 2 3
key lval1 lval2 rval1 rval2 0 b -1.633727 -0.351369 -0.036699 0.724182 1 c 1.558975 -0.179692 -1.241680 -1.695795
7. Get the floating value columns of df_left (lval1,lval2), get the square root of the absolute values using apply
10. Get the data of “Countries and dependencies by area” from wiki and save to the excel excluding index
1 2 3 4 5 6 7 8 9
import requests url_wiki = 'https://en.wikipedia.org/wiki/List_of_countries_and_dependencies_by_area' r = requests.get(url_wiki,headers ={'User-Agent':'Mozilla/5.0 (Windows NT 10.0;Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124Safari/537.36'})
data = pd.read_html(r.text) print([idata.shape for idata in data]) data_area = data[1] print(data_area) data_area.to_excel('area_info.xlsx',index = False)