# currently no other option but to have daily logs and have year-month-day format in the name with # 4-digit year and 2-digit month and day log_name_pattern: sample_logs/counter_(yyyy-mm-dd).log # path_types regular expressions allow matching to classify page urls as either an investigation or request # based on specific URL structure for your system. # Dataverse Note: the url matches on this does not include the query params, so dataset.xhtml\S+ will not match path_types: investigations: - ^.*/dataset.xhtml\S*$ - ^.*/file.xhtml\S*$ - ^.*/api/datasets\S*$ - ^.*/api/v1/datasets\S*$ ## Below historic regex for testing #- ^/api/datasets/[^\/]+$ #- ^/api/versions/\d+$ #- ^/stash/dataset/\S+$ #- ^/stash/data_paper/\S+$ requests: - ^.*/api/access/datafile\S+$ - ^.*/api/v1/access/datafile\S+$ ## Below historic regex for testing #- ^/api/datasets/[^\/]+/download$ #- ^/api/versions/\d+/download$ #- ^/api/downloads/\d+$ #- ^/stash/downloads/download_resource/\d+$ #- ^/stash/downloads/file_download/\d+$ #- ^/stash/downloads/file_stream/\d+$ #- ^/stash/downloads/async_request/\d+$ #- ^/stash/share/\S+$ # Robots and machines urls are urls where the script can download a list of regular expressions to determine # if something is a robot or machine user-agent. The text file has one regular expression per line robots_url: https://raw.githubusercontent.com/CDLUC3/Make-Data-Count/master/user-agents/lists/robot.txt machines_url: https://raw.githubusercontent.com/CDLUC3/Make-Data-Count/master/user-agents/lists/machine.txt # the year and month for the report you are creating. #year_month: 2018-05 year_month: 2019-01 # Don't put the filename extension, the code will tack on the tsv or json extension for you. # Output formats are either tsv or json currently. TSV is currently broken until anyone accepts reports in that format. output_file: /dataverse/sushi_sample_logs output_format: json # the name of the platform that goes into your reports platform: Dash # Don't put your api token in here if you're going to commit it, but put in separate secrets.yaml in same # directory as the config or else set a environment variable when starting up in order to override the key. # yaml key/values set in secrets.yaml will override one from the main config. hub_api_token: set_me_in_secrets # the test metrics is only for testing # hub_base_url: https://metrics.test.datacite.org hub_base_url: https://api.test.datacite.org upload_to_hub: False # only use this to simulate running on a date besides today # simulate_date: 2018-04-02 maxmind_geoip_country_path: maxmind_geoip/GeoLite2-Country.mmdb