-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathget_train.py
More file actions
46 lines (38 loc) · 1.19 KB
/
Copy pathget_train.py
File metadata and controls
46 lines (38 loc) · 1.19 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
import os
import pandas as pd
import requests
from bs4 import BeautifulSoup
sys_path = '/home/anurag/.local/lib/python3.5/site-packages'
count = 0
file_counter = 0
for path, dirs, files in os.walk(sys_path):
for file in files:
if(count > 3000):
break
if ('.py' in file):
try:
with open(f'input_{file_counter}.txt', 'a', encoding="utf-8") as f:
with open(os.path.join(path, file), 'r') as data:
contents = data.read()
f.write(contents)
f.write('\n')
f.flush()
count = count + 1
if(count % 100 == 0):
file_counter = file_counter + 1
except Exception as e:
#print (str(e))
pass
count = 0
for path, dirs, files in os.walk(sys_path):
for file in files:
if('.py' in file):
count = count + 1
print (count)
contents = []
for i in range(19):
with open(f'input_{i}.txt') as f:
contents.append(f.read())
d = {'text':contents, 'valid':False}
df = pd.DataFrame(data=d)
df.to_csv('sys_code_final.csv', index=False)