Skip to content

Instantly share code, notes, and snippets.

@GoingMyWay
Created January 29, 2021 02:47
Show Gist options
  • Select an option

  • Save GoingMyWay/3782c11a541725763fc26082782a5973 to your computer and use it in GitHub Desktop.

Select an option

Save GoingMyWay/3782c11a541725763fc26082782a5973 to your computer and use it in GitHub Desktop.
Download ICLR 2021 papers
secondString="pdf";
sec="";
while IFS="," read title url; do
nurl=${url/forum/$secondString};
ntitle=${title//[:?^]/_};
wget -O ./papers/21-"${ntitle}".pdf $(echo $nurl | tr -d '\r';) &
done < iclr_papers.csv
@GoingMyWay
Copy link
Copy Markdown
Author

GoingMyWay commented Nov 11, 2021

The code for downloading the data

import argparse
import multiprocessing

import tqdm
import openreview
import pandas as pd


def worker(reviews):
    client = openreview.Client(baseurl='https://api.openreview.net', username='', password='')
    papers = {'title': [], 'link': [], 'rating': [], 'avg_rating': [], 'keywords': [], 'n_comments': []}
    #papers = {'title': [], 'link': []}
    for review in tqdm.tqdm(reviews):
        _id = review.id
        _title = review.content['title']
        _comments = client.get_notes(forum=_id)
        _keywords = review.content['keywords']
        _ratings = []
        
        for c in _comments:
            if 'rating' in c.content.keys(): 
                _ratings.append(int(c.content['rating'][0]))
        
        papers['title'].append(_title)
        papers['link'].append(f'https://openreview.net/forum?id={_id}')
        papers['rating'].append(_ratings)
        papers['avg_rating'].append(sum(_ratings)/len(_ratings))
        papers['keywords'].append([v.lower() for v in _keywords])
        papers['n_comments'].append(len(_comments))
    return papers


def main(args):
    client = openreview.Client(baseurl='https://api.openreview.net', username='', password='')
    blind_submissions_iterator = openreview.tools.iterget_notes(client, invitation=args.conf)
    all_reviews = [review for review in blind_submissions_iterator]

    chunk = len(all_reviews) // args.n_runner

    p = multiprocessing.Pool(processes=args.n_runner)
    data = p.map(worker, [all_reviews[i*chunk: (i+1)*chunk if i != args.n_runner -1 else (args.n_runner+1)*chunk] for i in range(args.n_runner)])
    print(len(data))
    p.close()
    p.join()

    all_data = {}
    print('saving data....')
    for d in tqdm.tqdm(data):
        for k, v in d.items():
            if k not in all_data:
                all_data[k] = v
            else:
                all_data[k].extend(v)

    df = pd.DataFrame.from_dict(all_data)
    df.to_csv('iclr_2021_list.csv')


if __name__ == '__main__':
    argparser = argparse.ArgumentParser(description='ICLR data parser')
    # or NeurIPS
    argparser.add_argument('--conf', type=str, default='ICLR.cc/2021/Conference/-/Blind_Submission', help='conference link')
    argparser.add_argument('--n-runner', type=int, default=32, help='number of threads')
    parser = argparser.parse_args()

    main(parser)

python iclr_list.py

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment