-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathsplit.py
More file actions
19 lines (17 loc) · 799 Bytes
/
split.py
File metadata and controls
19 lines (17 loc) · 799 Bytes
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
import argparse
import random
def split(filename, treshold):
treshold = max(treshold, 1-treshold)
with open(filename, 'r') as infile:
with open(filename + '_train', 'w') as train, open(filename + '_validate', 'w') as test:
for line in infile:
if random.random() > treshold:
test.write(line)
else:
train.write(line)
if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument('input', help='path to the input file', type=str)
parser.add_argument('-t', '--treshold', help='between 0 and 1, the training set will always be the bigger one, i.e. 0.2 and 0.8 are equivalent', type=float, default=0.8)
args = parser.parse_args()
split(args.input, args.treshold)