{
  "_id": "6a129638acfb0bcc41d0c6e6",
  "Type": "Package",
  "Package": "bdpar",
  "Title": "Big Data Preprocessing Architecture",
  "Version": "3.1.0",
  "Authors@R": "c(person(given = \"Miguel\",\nfamily = \"Ferreiro-Díaz\",\nrole = c(\"aut\",\"cre\"),\nemail = \"miguel.ferreiro.diaz@gmail.com\"),\nperson(given = \"David\",\nfamily = \"Ruano-Ordás\",\nrole = c(\"aut\",\"ctr\"),\nemail = \"drordas@uvigo.es\"),\nperson(given = \"Tomás R.\",\nfamily= \"Cotos-Yañez\",\nrole = c(\"aut\",\"ctr\"),\nemail = \"cotos@uvigo.es\"),\nperson(given = \"José Ramón\",\nfamily= \"Méndez Reboredo\",\nrole = c(\"aut\",\"ctr\"),\nemail = \"moncho.mendez@uvigo.es\"),\nperson(given = \"University of Vigo\",\nrole = c(\"cph\")))",
  "Description": "Provide a tool to easily build customized data flows to\npre-process large volumes of information from different\nsources. To this end, 'bdpar' allows to (i) easily use and\ncreate new functionalities and (ii) develop new data source\nextractors according to the user needs. Additionally, the\npackage provides by default a predefined data flow to extract\nand pre-process the most relevant information (tokens, dates,\n... ) from some textual sources (SMS, Email, YouTube comments).",
  "Date": "2023-12-11",
  "License": "GPL-3",
  "URL": "https://github.com/miferreiro/bdpar",
  "BugReports": "https://github.com/miferreiro/bdpar/issues",
  "VignetteBuilder": "knitr",
  "RoxygenNote": "7.2.3",
  "SystemRequirements": "Python (>= 2.7 or >= 3.6)",
  "Encoding": "UTF-8",
  "NeedsCompilation": "no",
  "Collate": "'AbbreviationPipe.R' 'bdpar.log.R' 'wrapper.R' 'Bdpar.R'\n'BdparOptions.R' 'Connections.R' 'ContractionPipe.R'\n'DefaultPipeline.R' 'DynamicPipeline.R' 'ExtractorEml.R'\n'ExtractorFactory.R' 'ExtractorSms.R' 'ExtractorYtbid.R'\n'File2Pipe.R' 'FindEmojiPipe.R' 'FindEmoticonPipe.R'\n'FindHashtagPipe.R' 'FindUrlPipe.R' 'FindUserNamePipe.R'\n'GenericPipe.R' 'GenericPipeline.R' 'GuessDatePipe.R'\n'GuessLanguagePipe.R' 'Instance.R' 'InterjectionPipe.R'\n'MeasureLengthPipe.R' 'ResourceHandler.R' 'SlangPipe.R'\n'StopWordPipe.R' 'StoreFileExtPipe.R' 'TargetAssigningPipe.R'\n'TeeCSVPipe.R' 'ToLowerCasePipe.R' 'bdpar.Options.R'\n'bdparData.R' 'eml.R' 'emojisData.R' 'operator-pipe.R'\n'runPipeline.R' 'zzz.R'",
  "Config/pak/sysreqs": "libxml2-dev python3",
  "Repository": "https://miferreiro.r-universe.dev",
  "Date/Publication": "2023-12-12 17:29:06 UTC",
  "RemoteUrl": "https://github.com/miferreiro/bdpar",
  "RemoteRef": "HEAD",
  "RemoteSha": "e92df857b09e83ee4f197e68577b8f486dfebf8c",
  "Packaged": {
    "Date": "2026-05-24 06:06:44 UTC",
    "User": "root"
  },
  "Author": "Miguel Ferreiro-Díaz [aut, cre],\nDavid Ruano-Ordás [aut, ctr],\nTomás R. Cotos-Yañez [aut, ctr],\nJosé Ramón Méndez Reboredo [aut, ctr],\nUniversity of Vigo [cph]",
  "Maintainer": "Miguel Ferreiro-Díaz <miguel.ferreiro.diaz@gmail.com>",
  "MD5sum": "2f9009689558205857c382376f6024bf",
  "_user": "miferreiro",
  "_type": "src",
  "_file": "bdpar_3.1.0.tar.gz",
  "_fileid": "520b211f6734952a30811cf12581f9cf70807d1ff5fcbb9b3848df09792a4c8f",
  "_filesize": 654002,
  "_sha256": "520b211f6734952a30811cf12581f9cf70807d1ff5fcbb9b3848df09792a4c8f",
  "_created": "2026-05-24T06:06:44.000Z",
  "_published": "2026-05-24T06:10:00.157Z",
  "_distro": "noble",
  "_jobs": [
    {
      "job": 77576213080,
      "time": 153,
      "config": "linux-devel-x86_64",
      "r": "4.7.0",
      "check": "OK",
      "artifact": "7182757097"
    },
    {
      "job": 77576213065,
      "time": 151,
      "config": "linux-release-x86_64",
      "r": "4.6.0",
      "check": "OK",
      "artifact": "7182756812"
    },
    {
      "job": 77576213085,
      "time": 100,
      "config": "macos-oldrel-arm64",
      "r": "4.5.3",
      "check": "OK",
      "artifact": "7182751423"
    },
    {
      "job": 77576213070,
      "time": 98,
      "config": "macos-release-arm64",
      "r": "4.6.0",
      "check": "OK",
      "artifact": "7182751206"
    },
    {
      "job": 77576016794,
      "time": 203,
      "config": "source",
      "r": "4.6.0",
      "check": "OK",
      "artifact": "7182740190"
    },
    {
      "job": 77576213077,
      "time": 111,
      "config": "wasm-release",
      "r": "4.6.0",
      "check": "OK",
      "artifact": "7182752680"
    },
    {
      "job": 77576213086,
      "time": 103,
      "config": "windows-devel",
      "r": "4.7.0",
      "check": "OK",
      "artifact": "7182751727"
    },
    {
      "job": 77576213095,
      "time": 101,
      "config": "windows-oldrel",
      "r": "4.5.3",
      "check": "OK",
      "artifact": "7182751666"
    },
    {
      "job": 77576213088,
      "time": 118,
      "config": "windows-release",
      "r": "4.6.0",
      "check": "OK",
      "artifact": "7182753319"
    }
  ],
  "_buildurl": "https://github.com/r-universe/miferreiro/actions/runs/26353561204",
  "_status": "success",
  "_host": "GitHub-Actions",
  "_upstream": "https://github.com/miferreiro/bdpar",
  "_commit": {
    "id": "e92df857b09e83ee4f197e68577b8f486dfebf8c",
    "author": "miferreiro <miguel.ferreiro.diaz@gmail.com>",
    "committer": "miferreiro <miguel.ferreiro.diaz@gmail.com>",
    "message": "Merge branch 'develop'\n\n",
    "time": 1702402146
  },
  "_maintainer": {
    "name": "Miguel Ferreiro-Díaz",
    "email": "miguel.ferreiro.diaz@gmail.com",
    "login": "miferreiro",
    "description": "",
    "uuid": 41105981
  },
  "_registered": true,
  "_dependencies": [
    {
      "package": "R",
      "version": ">= 3.5.0",
      "role": "Depends"
    },
    {
      "package": "digest",
      "role": "Imports"
    },
    {
      "package": "parallel",
      "role": "Imports"
    },
    {
      "package": "R6",
      "role": "Imports"
    },
    {
      "package": "rlist",
      "role": "Imports"
    },
    {
      "package": "tools",
      "role": "Imports"
    },
    {
      "package": "utils",
      "role": "Imports"
    },
    {
      "package": "cld2",
      "role": "Suggests"
    },
    {
      "package": "knitr",
      "role": "Suggests"
    },
    {
      "package": "rex",
      "role": "Suggests"
    },
    {
      "package": "rjson",
      "role": "Suggests"
    },
    {
      "package": "rmarkdown",
      "role": "Suggests"
    },
    {
      "package": "stringi",
      "role": "Suggests"
    },
    {
      "package": "stringr",
      "role": "Suggests"
    },
    {
      "package": "testthat",
      "version": ">= 2.3.1",
      "role": "Suggests"
    },
    {
      "package": "tuber",
      "role": "Suggests"
    }
  ],
  "_owner": "miferreiro",
  "_selfowned": true,
  "_usedby": 0,
  "_updates": [],
  "_tags": [],
  "_topics": [
    "custom-flow",
    "custom-pipes",
    "preprocessing",
    "r6"
  ],
  "_stars": 8,
  "_contributors": [
    {
      "user": "miferreiro",
      "count": 112,
      "uuid": 41105981
    }
  ],
  "_userbio": {
    "uuid": 41105981,
    "type": "user",
    "name": "Miguel"
  },
  "_downloads": {
    "count": 288,
    "source": "https://cranlogs.r-pkg.org/downloads/total/last-month/bdpar"
  },
  "_devurl": "https://github.com/miferreiro/bdpar",
  "_searchresults": 14,
  "_rbuild": "4.6.0",
  "_assets": [
    "extra/bdpar.html",
    "extra/citation.cff",
    "extra/citation.html",
    "extra/citation.json",
    "extra/citation.txt",
    "extra/contents.json",
    "extra/NEWS.html",
    "extra/NEWS.txt",
    "extra/readme.html",
    "extra/readme.md",
    "manual.pdf"
  ],
  "_homeurl": "https://github.com/miferreiro/bdpar",
  "_realowner": "miferreiro",
  "_cranurl": true,
  "_releases": [
    {
      "version": "1.0.0",
      "date": "2019-07-26"
    },
    {
      "version": "1.0.1",
      "date": "2020-01-09"
    },
    {
      "version": "2.0.0",
      "date": "2020-02-20"
    },
    {
      "version": "3.0.0",
      "date": "2020-11-25"
    },
    {
      "version": "3.0.1",
      "date": "2021-06-24"
    },
    {
      "version": "3.0.2",
      "date": "2022-05-18"
    },
    {
      "version": "3.0.3",
      "date": "2022-08-22"
    },
    {
      "version": "3.1.0",
      "date": "2023-12-12"
    }
  ],
  "_exports": [
    "%>|%",
    "AbbreviationPipe",
    "Bdpar",
    "bdpar.log",
    "bdpar.Options",
    "Connections",
    "ContractionPipe",
    "DefaultPipeline",
    "DynamicPipeline",
    "ExtractorEml",
    "ExtractorFactory",
    "ExtractorSms",
    "ExtractorYtbid",
    "File2Pipe",
    "FindEmojiPipe",
    "FindEmoticonPipe",
    "FindHashtagPipe",
    "FindUrlPipe",
    "FindUserNamePipe",
    "GenericPipe",
    "GenericPipeline",
    "GuessDatePipe",
    "GuessLanguagePipe",
    "Instance",
    "InterjectionPipe",
    "MeasureLengthPipe",
    "ResourceHandler",
    "runPipeline",
    "SlangPipe",
    "StopWordPipe",
    "StoreFileExtPipe",
    "TargetAssigningPipe",
    "TeeCSVPipe",
    "ToLowerCasePipe"
  ],
  "_datasets": [
    {
      "name": "bdparData",
      "title": "Example of the content of the files to be preprocessed.",
      "object": "bdparData",
      "file": "bdparData.rda",
      "class": [
        "data.frame"
      ],
      "fields": [
        "path",
        "source"
      ],
      "rows": 40,
      "table": true,
      "tojson": true
    },
    {
      "name": "emojisData",
      "title": "Emojis codes and descriptions data.",
      "object": "emojisData",
      "file": "emojisData.rda",
      "class": [
        "tbl_df",
        "tbl",
        "data.frame"
      ],
      "fields": [
        "code",
        "description"
      ],
      "rows": 2623,
      "table": true,
      "tojson": true
    }
  ],
  "_help": [
    {
      "page": "AbbreviationPipe",
      "title": "Class to find and/or replace the abbreviations on the data field of an Instance",
      "topics": [
        "AbbreviationPipe"
      ]
    },
    {
      "page": "Bdpar",
      "title": "Class to manage the preprocess of the files throughout the flow of pipes",
      "topics": [
        "Bdpar"
      ]
    },
    {
      "page": "bdpar.log",
      "title": "Write messages to the log at a given priority level using the custom bdpar log",
      "topics": [
        "bdpar.log"
      ]
    },
    {
      "page": "bdpar.Options",
      "title": "Object to handle the keys/attributes/options common to all pipeline flow",
      "topics": [
        "bdpar.Options"
      ]
    },
    {
      "page": "bdparData",
      "title": "Example of the content of the files to be preprocessed.",
      "topics": [
        "bdparData"
      ]
    },
    {
      "page": "Connections",
      "title": "Class to manage the connections with YouTube",
      "topics": [
        "Connections"
      ]
    },
    {
      "page": "ContractionPipe",
      "title": "Class to find and/or replace the contractions on the data field of a Instance",
      "topics": [
        "ContractionPipe"
      ]
    },
    {
      "page": "DefaultPipeline",
      "title": "Class implementing a default pipelining process.",
      "topics": [
        "DefaultPipeline"
      ]
    },
    {
      "page": "DynamicPipeline",
      "title": "Class implementing a dynamic pipelining process",
      "topics": [
        "DynamicPipeline"
      ]
    },
    {
      "page": "emojisData",
      "title": "Emojis codes and descriptions data.",
      "topics": [
        "emojisData"
      ]
    },
    {
      "page": "ExtractorEml",
      "title": "Class to handle email files with eml extension",
      "topics": [
        "ExtractorEml"
      ]
    },
    {
      "page": "ExtractorFactory",
      "title": "Class to handle the creation of Instance types",
      "topics": [
        "ExtractorFactory"
      ]
    },
    {
      "page": "ExtractorSms",
      "title": "Class to handle SMS files with tsms extension",
      "topics": [
        "ExtractorSms"
      ]
    },
    {
      "page": "ExtractorYtbid",
      "title": "Class to handle comments of YouTube files with ytbid extension",
      "topics": [
        "ExtractorYtbid"
      ]
    },
    {
      "page": "File2Pipe",
      "title": "Class to obtain the source field of an Instance",
      "topics": [
        "File2Pipe"
      ]
    },
    {
      "page": "FindEmojiPipe",
      "title": "Class to find and/or replace the emoji on the data field of an Instance",
      "topics": [
        "FindEmojiPipe"
      ]
    },
    {
      "page": "FindEmoticonPipe",
      "title": "Class to find and/or remove the emoticons on the data field of an Instance",
      "topics": [
        "FindEmoticonPipe"
      ]
    },
    {
      "page": "FindHashtagPipe",
      "title": "Class to find and/or remove the hashtags on the data field of an Instance",
      "topics": [
        "FindHashtagPipe"
      ]
    },
    {
      "page": "FindUrlPipe",
      "title": "Class to find and/or remove the URLs on the data field of an Instance",
      "topics": [
        "FindUrlPipe"
      ]
    },
    {
      "page": "FindUserNamePipe",
      "title": "Class to find and/or remove the users on the data field of an Instance",
      "topics": [
        "FindUserNamePipe"
      ]
    },
    {
      "page": "GenericPipe",
      "title": "Abstract super class that handles the management of the Pipes",
      "topics": [
        "GenericPipe"
      ]
    },
    {
      "page": "GenericPipeline",
      "title": "Abstract super class implementing the pipelining process",
      "topics": [
        "GenericPipeline"
      ]
    },
    {
      "page": "GuessDatePipe",
      "title": "Class to obtain the date field of an Instance",
      "topics": [
        "GuessDatePipe"
      ]
    },
    {
      "page": "GuessLanguagePipe",
      "title": "Class to guess the language of an Instance",
      "topics": [
        "GuessLanguagePipe"
      ]
    },
    {
      "page": "Instance",
      "title": "Abstract super class that handles the management of the Instances",
      "topics": [
        "Instance"
      ]
    },
    {
      "page": "InterjectionPipe",
      "title": "Class to find and/or remove the interjections on the data field of an Instance",
      "topics": [
        "InterjectionPipe"
      ]
    },
    {
      "page": "MeasureLengthPipe",
      "title": "Class to obtain the length of the data field of an Instance",
      "topics": [
        "MeasureLengthPipe"
      ]
    },
    {
      "page": "operator-pipe",
      "title": "bdpar customized forward-pipe operator",
      "topics": [
        "%>|%",
        "operator-pipe"
      ]
    },
    {
      "page": "ResourceHandler",
      "title": "Class that handles different types of resources",
      "topics": [
        "ResourceHandler"
      ]
    },
    {
      "page": "runPipeline",
      "title": "Initiates the pipelining process",
      "topics": [
        "runPipeline"
      ]
    },
    {
      "page": "SlangPipe",
      "title": "Class to find and/or replace the slangs on the data field of an Instance",
      "topics": [
        "SlangPipe"
      ]
    },
    {
      "page": "StopWordPipe",
      "title": "Class to find and/or remove the stop words on the data field of an Instance",
      "topics": [
        "StopWordPipe"
      ]
    },
    {
      "page": "StoreFileExtPipe",
      "title": "Class to get the file's extension field of an Instance",
      "topics": [
        "StoreFileExtPipe"
      ]
    },
    {
      "page": "TargetAssigningPipe",
      "title": "Class to get the target field of the Instance",
      "topics": [
        "TargetAssigningPipe"
      ]
    },
    {
      "page": "TeeCSVPipe",
      "title": "Class to handle a CSV with the properties field of the preprocessed Instance",
      "topics": [
        "TeeCSVPipe"
      ]
    },
    {
      "page": "ToLowerCasePipe",
      "title": "Class to convert the data field of an Instance to lower case",
      "topics": [
        "ToLowerCasePipe"
      ]
    }
  ],
  "_readme": "https://github.com/miferreiro/bdpar/raw/HEAD/README.md",
  "_rundeps": [
    "data.table",
    "digest",
    "jsonlite",
    "R6",
    "rlist",
    "XML",
    "yaml"
  ],
  "_vignettes": [
    {
      "source": "bdpar.Rmd",
      "filename": "bdpar.html",
      "title": "A Brief Introduction to bdpar",
      "author": "Miguel Ferreiro Diaz",
      "engine": "knitr::rmarkdown",
      "headings": [
        "Abstract",
        "Introduction and basics",
        "Instance",
        "Types of input data available by default",
        "How to create a customized input data extractors.",
        "Enabling a new Instance.",
        "Pipe",
        "Dependencies",
        "Pipes available by default",
        "(i) Pipes of basic functionality",
        "File2Pipe",
        "FindEmojiPipe",
        "FindEmoticonPipe",
        "FindHashtagPipe",
        "FindUrlPipe",
        "FindUserNamePipe",
        "GuessDatePipe",
        "GuessLanguagePipe",
        "MeasureLengthPipe",
        "StoreFileExtPipe",
        "TargetAssigningPipe",
        "TeeCSVPipe",
        "ToLowerCasePipe",
        "(ii) Pipes that access external files",
        "AbbreviationPipe",
        "ContractionPipe",
        "InterjectionPipe",
        "SlangPipe",
        "StopWordPipe",
        "How to create your customized Pipe",
        "Flow of Pipes (pipelining proccess)",
        "Flow of Pipes available by default",
        "Create your own flow of Pipes",
        "Operator",
        "bdpar.Options",
        "[resources]",
        "[teeCSVPipe]",
        "[youtube]",
        "[cache]",
        "[parallel]",
        "[verbose]",
        "Cache functionality",
        "Parallelization"
      ],
      "created": "2019-07-09 09:20:41",
      "modified": "2023-12-12 17:29:06",
      "commits": 6
    },
    {
      "source": "bdparExample.Rmd",
      "filename": "bdparExample.html",
      "title": "Basic example using bdpar package",
      "author": "Miguel Ferreiro Diaz",
      "engine": "knitr::rmarkdown",
      "headings": [
        "Introduction",
        "Example",
        "Visualization",
        "Execution"
      ],
      "created": "2020-02-20 09:47:40",
      "modified": "2020-11-25 19:04:59",
      "commits": 2
    },
    {
      "source": "bdparExampleImage.Rmd",
      "filename": "bdparExampleImage.html",
      "title": "Image processing example using bdpar package",
      "author": "Miguel Ferreiro Diaz",
      "engine": "knitr::rmarkdown",
      "headings": [
        "Introduction",
        "Example",
        "Preparation",
        "Extractor",
        "Creation of pipes",
        "Execution"
      ],
      "created": "2020-11-25 19:04:59",
      "modified": "2020-11-25 19:04:59",
      "commits": 1
    }
  ],
  "_score": 5.225309281725862,
  "_indexed": true,
  "_nocasepkg": "bdpar",
  "_universes": [
    "miferreiro"
  ],
  "_binaries": [
    {
      "r": "4.7.0",
      "os": "linux",
      "version": "3.1.0",
      "date": "2026-05-24T06:09:10.000Z",
      "distro": "noble",
      "commit": "e92df857b09e83ee4f197e68577b8f486dfebf8c",
      "fileid": "9bb262ac6b54135fcdd3d39801510a39a5903473e35ac5978060671cf85182a1",
      "status": "success",
      "check": "OK",
      "buildurl": "https://github.com/r-universe/miferreiro/actions/runs/26353561204"
    },
    {
      "r": "4.6.0",
      "os": "linux",
      "version": "3.1.0",
      "date": "2026-05-24T06:09:06.000Z",
      "distro": "noble",
      "commit": "e92df857b09e83ee4f197e68577b8f486dfebf8c",
      "fileid": "e9b9418caecd881d8981e0d3a675fb9c836d57618c6cd6fdfc3476853157f7e8",
      "status": "success",
      "check": "OK",
      "buildurl": "https://github.com/r-universe/miferreiro/actions/runs/26353561204"
    },
    {
      "r": "4.5.3",
      "os": "mac",
      "version": "3.1.0",
      "date": "2026-05-24T06:08:25.000Z",
      "commit": "e92df857b09e83ee4f197e68577b8f486dfebf8c",
      "fileid": "1d6389953b377d5ebe718c75bba376e0206f8b4b448434a4953bcf2b7f84856d",
      "status": "success",
      "check": "OK",
      "buildurl": "https://github.com/r-universe/miferreiro/actions/runs/26353561204"
    },
    {
      "r": "4.6.0",
      "os": "mac",
      "version": "3.1.0",
      "date": "2026-05-24T06:08:24.000Z",
      "commit": "e92df857b09e83ee4f197e68577b8f486dfebf8c",
      "fileid": "44b2d703df4e2f06f7df33937c9f5980f10828a2f8ca87d0e6a986cb5fab3e8d",
      "status": "success",
      "check": "OK",
      "buildurl": "https://github.com/r-universe/miferreiro/actions/runs/26353561204"
    },
    {
      "r": "4.6.0",
      "os": "wasm",
      "version": "3.1.0",
      "date": "2026-05-24T06:09:03.000Z",
      "commit": "e92df857b09e83ee4f197e68577b8f486dfebf8c",
      "fileid": "c3f99c0f284e4f2f03335790fe232a992006dc56229218e9c65f29457b2d752e",
      "status": "success",
      "buildurl": "https://github.com/r-universe/miferreiro/actions/runs/26353561204"
    },
    {
      "r": "4.7.0",
      "os": "win",
      "version": "3.1.0",
      "date": "2026-05-24T06:08:09.000Z",
      "commit": "e92df857b09e83ee4f197e68577b8f486dfebf8c",
      "fileid": "7a4f9a9088fb52d455fe527381ed954553429251734104e3ecab683da08ca2f7",
      "status": "success",
      "check": "OK",
      "buildurl": "https://github.com/r-universe/miferreiro/actions/runs/26353561204"
    },
    {
      "r": "4.5.3",
      "os": "win",
      "version": "3.1.0",
      "date": "2026-05-24T06:08:10.000Z",
      "commit": "e92df857b09e83ee4f197e68577b8f486dfebf8c",
      "fileid": "be9ae0e3d0db053e51369e97de7edb1b1b4b081d6986bd9deca38e5c081f4df9",
      "status": "success",
      "check": "OK",
      "buildurl": "https://github.com/r-universe/miferreiro/actions/runs/26353561204"
    },
    {
      "r": "4.6.0",
      "os": "win",
      "version": "3.1.0",
      "date": "2026-05-24T06:08:18.000Z",
      "commit": "e92df857b09e83ee4f197e68577b8f486dfebf8c",
      "fileid": "84c3c6c645de0f07006a3a5bd2a27aadc4641721626108a0129fb0563ca29df9",
      "status": "success",
      "check": "OK",
      "buildurl": "https://github.com/r-universe/miferreiro/actions/runs/26353561204"
    }
  ]
}