# currently no other option but to have daily logs and have year-month-day format in the name with # 4-digit year and 2-digit month and day # /usr/local/payara6/glassfish/domains/domain1/logs/counter_2019-01-11.log #log_name_pattern: sample_logs/counter_(yyyy-mm-dd).log log_name_pattern: /usr/local/payara6/glassfish/domains/domain1/logs/mdc/counter_(yyyy-mm-dd).log # path_types regular expressions allow matching to classify page urls as either an investigation or request # based on specific URL structure for your system. # Dataverse Note: the url matches on this does not include the query params, so dataset.xhtml\S+ will not match path_types: investigations: - ^.*/dataset.xhtml\S*$ - ^.*/file.xhtml\S*$ - ^.*/api/datasets\S*$ - ^.*/api/v1/datasets\S*$ ## Below historic regex for testing #- ^/api/datasets/[^\/]+$ #- ^/api/versions/\d+$ #- ^/stash/dataset/\S+$ #- ^/stash/data_paper/\S+$ requests: - ^.*/api/access/datafile\S+$ - ^.*/api/v1/access/datafile\S+$ ## Below historic regex for testing #- ^/api/datasets/[^\/]+/download$ #- ^/api/versions/\d+/download$ #- ^/api/downloads/\d+$ #- ^/stash/downloads/download_resource/\d+$ #- ^/stash/downloads/file_download/\d+$ #- ^/stash/downloads/file_stream/\d+$ #- ^/stash/downloads/async_request/\d+$ #- ^/stash/share/\S+$ # Robots and machines urls are urls where the script can download a list of regular expressions to determine # if something is a robot or machine user-agent. The text file has one regular expression per line robots_url: https://raw.githubusercontent.com/CDLUC3/Make-Data-Count/master/user-agents/lists/robot.txt machines_url: https://raw.githubusercontent.com/CDLUC3/Make-Data-Count/master/user-agents/lists/machine.txt # the year and month for the report you are creating. year_month: 2019-01 # Don't put the filename extension, the code will tack on the tsv or json extension for you. # Output formats are either tsv or json currently. TSV is currently broken until anyone accepts reports in that format. # FIXME: "/tmp" is fine for a quick test but pick another directory that # the "counter" user can write to and that the "dataverse" user can read from. output_file: /tmp/make-data-count-report output_format: json # the name of the platform that goes into your reports # FIXME: Change "platform" to match the name of your Dataverse installation. Examples # are "Harvard Dataverse" for Harvard University or "LibraData" for the University of Virginia. platform: LibreScholar # Don't put your api token in here if you're going to commit it, but put in separate secrets.yaml in same # directory as the config or else set a environment variable when starting up in order to override the key. # yaml key/values set in secrets.yaml will override one from the main config. hub_api_token: set_me_in_secrets # the test metrics is only for testing # hub_base_url: https://metrics.test.datacite.org # FIXME: change "hub_base_url" to https://api.datacite.org (production) once you have credentials. hub_base_url: https://api.test.datacite.org # FIXME: Change "upload_to_hub" to True when you're ready. upload_to_hub: False # only use this to simulate running on a date besides today # simulate_date: 2019-01-12 maxmind_geoip_country_path: maxmind_geoip/GeoLite2-Country.mmdb