translation
translation
{ "af": "Kom", "en": "Come on." }
{ "af": "Nie ingebed nie", "en": "Not embedded" }
{ "af": "'n lêer genaamd% 1 alreeds bestaan.", "en": "A file named %1 already exists." }
{ "af": "@ action: button", "en": "Press this button to toggle the incidence details display." }
{ "af": "Voer in na:", "en": "Import Into:" }
{ "af": "Abstrak", "en": "Inversed Subtract" }
{ "af": "Vesnter Een Werkskerm na Regs", "en": "Window One Desktop to the Right" }
{ "af": "MIPS AssemblerLanguage", "en": "MIPS Assembler" }
{ "af": "Stooring oefening lêer", "en": "Save Training Lecture" }
{ "af": "Mexikaanse peso", "en": "Mexican Peso" }
{ "af": "Dieselfde mense wat ons wil doodmaak.", "en": "Same people who want us dead." }
{ "af": "JazzGenericName", "en": "Jazz" }
{ "af": "Toggle uitkiesing vertoon op drukker lys.", "en": "Toggle selective view on printer list" }
{ "af": "tien _BAR_ 10", "en": "ten _BAR_ 10" }
{ "af": "Skei een sel binnein twee of meer selle.", "en": "Split one cell into two or more cells" }
{ "af": "_Hou huidige ligging", "en": "_Keep Current Location" }
{ "af": "Maak oop 'n onlangse gebruik word lêer", "en": "Open Lecture File" }
{ "af": "Jy het my nie vertel dat Ludwig getroud is, en 'n kind het nie", "en": "You did not tell me that Ludwig is married and has a child." }
{ "af": "Nie moontlik na Genereer Besig om te sorteer Nommer", "en": "Unable to Generate Sorting Number" }
{ "af": "Hierdie probleem was reeds gerapporteer in die foutverslag vertoon in die webblaaier. Gaan na of enige verdere informasie bygevoeg kan word wat tot hulp van die ontwikkelaars kan wees.", "en": "This problem was already reported in the bug report displayed in the web browser. Please check if you can add any further information that might be helpful for the developers." }
{ "af": "ontwikkelaar (AdBlock filter)", "en": "Developer (AdBlock filter)" }
{ "af": "Slimkaartverifiëring", "en": "Smartcard Authentication" }
{ "af": "Terravorming groen", "en": "Terraform Green" }
{ "af": "agt", "en": "eight" }
{ "af": "Vermenigvuldig [*]", "en": "Multiply [*]" }
{ "af": "@ action: inmenu Amor", "en": "Configure ..." }
{ "af": "Onbekende karakterstel ! Verstek karakterstel is gebruik word in plaas van .", "en": "Unknown charset . Default charset is used instead ." }
{ "af": "Engels (Groot Brittanje)", "en": "English (Great Britain)" }
{ "af": "Einde van dokument bereik .", "en": "End of document reached ." }
{ "af": "herverwerk", "en": "Reset" }
{ "af": "minute", "en": "minutes" }
{ "af": "TWEE JAAR LATER", "en": "TWO YEARS LATER" }
{ "af": "Daar is hy!", "en": "There he is!" }
{ "af": "Ek luister!", "en": "I do!" }
{ "af": "Jy sal binne 2 dae kan lees en by die venster uitkyk", "en": "You'll be able to read in two days. And look out the window." }
{ "af": "Druk", "en": "Print" }
{ "af": "Op makro-ekonomiese vlak sal ons voortgaan om 'n fiskale beleid vol te hou wat voortdurende ekonomiese groei en ontwikkeling ondersteun en ons eksterne kwesbaarheid verminder.", "en": "At the macro-economic level, we will continue to maintain a fiscal posture that supports continued economic growth and development and reducing our external vulnerability." }
{ "af": "Deskjet 560c", "en": "DeskJet 560C" }
{ "af": "Antwoord aan almal", "en": "Reply to All" }
{ "af": "Sentraal Europees", "en": "Central European" }
{ "af": "Wit dame op %1$s neem swart pion op %2$s", "en": "White queen at %1$s takes the black pawn at %2$s" }
{ "af": "Tydelik gestop", "en": "Paused" }
{ "af": "Letteragtige SimboleQFont", "en": "Letterlike Symbols" }
{ "af": "Kubrick", "en": "Kubrick" }
{ "af": "Google (Vat 'n kaans) Query", "en": "Google (I'm Feeling Lucky)" }
{ "af": "Nabootsing van die antieke rekenaar (Motif weergawe)", "en": "simulation of the ancient calculator (Motif version)" }
{ "af": "en huldig tesame, die Koning van alle wyn!", "en": "His Majesty's rule is known throughout the land" }
{ "af": "rooi nar", "en": "red joker" }
{ "af": "(8) Die aanbevelings in subartikel (7) beoog, moet deur die Minister in die Nasionale", "en": "(8) The recommendations contemplated in subsection (7) must be tabled in the" }
{ "af": "Dink jy hy het my nog lief?", "en": "Do you think he still loves me?" }
{ "af": "Druk die taaklys", "en": "Print this task" }
{ "af": "Tydelik gestop", "en": "Paused" }
{ "af": "Bladsy uitleg", "en": "Sort Rows" }
{ "af": "Bly saam met hulle.", "en": "Stay with them." }
{ "af": "_Kategorieë", "en": "_Categories" }
{ "af": "Verstaan jy? Oudword is moeilik vir baie mense,", "en": "You see, aging is cumbersome to others." }
{ "af": "He_rhaal die alarm", "en": "_Repeat the reminder" }
{ "af": "Daar was 'n fout met die vertoon van die hulp", "en": "There was an error deleting “%s”." }
{ "af": "Opvolg na Nuusgroep ...", "en": "Followup to Newsgroup ..." }
{ "af": "_Kopieer", "en": "_Copy" }
{ "af": "Swart ruiter op %1$s neem wit toring op %2$s", "en": "Black knight at %1$s takes the white rook at %2$s" }
{ "af": "R4R5\" is the abbreviation for \"Register 5", "en": "R4" }
{ "af": "Horlosieformaat", "en": "Clock Format" }
{ "af": "Nommer van gate:", "en": "Number of holes" }
{ "af": "Vervris plaaslike IMAP Kas", "en": "Rebuild Local IMAP Cache" }
{ "af": "Regterwysvinger", "en": "Right ring finger" }
{ "af": "Beste _passing", "en": "Best _Fit" }
{ "af": "Nuwe slegs", "en": "New only" }
{ "af": "herverwerk", "en": "Reference" }
{ "af": "Kon nie '%s' pixmap lêer vind nie", "en": "Could not find '%s' pixmap file" }
{ "af": "Wil u definitief die huidige projek leegmaak?", "en": "Do you really want to empty the current project?" }
{ "af": "PGP-sleutel: %s", "en": "PGP Key: %s" }
{ "af": "Ek herhaal, burgervliegtuig is ontplof.", "en": "Repeat, civilian aircraft has exploded." }
{ "af": "Lêer '%s' is nie 'n gewone lêer of gids nie.", "en": "File '%s' is not a regular file or directory." }
{ "af": "_Stoor spel vir later", "en": "_Save game for later" }
{ "af": "Gui- element Opstelling", "en": "Widget Configuration" }
{ "af": "\"Ek wou met jou praat oor Karin\"", "en": "\"I've wanted to talk to you about Karin\"." }
{ "af": "As eers 'n week", "en": "Once a & week" }
{ "af": "klaweraas", "en": "ace of clubs" }
{ "af": "AdaCity name (optional, probably does not need a translation)", "en": "Corbin" }
{ "af": "Hoe is die periodieke tabel van elemente gerangskik ?", "en": "Display atomic mass in the PSE" }
{ "af": "Versper willekeurige skyf toegang", "en": "Disable arbitrary disk access" }
{ "af": "Memolys-eienskappe", "en": "Memo List Properties" }
{ "af": "Kon nie lêer '%s' oopmaak nie: %s", "en": "Failed to open file '%s': %s" }
{ "af": "Rabi` al- Awal", "en": "Rabi` al-Awal" }
{ "af": "Detail: %s", "en": "Details: %s" }
{ "af": "Domeinkomponent", "en": "Domain Component" }
{ "af": "Onttrek data vanaf % 1 ...", "en": "Retrieving from %1 ..." }
{ "af": "Afsprake en vergaderings", "en": "Appointments and Meetings" }
{ "af": "Mev die sekretaris?", "en": "Madam Secretary?" }
{ "af": "Skryf tans", "en": "Writing" }
{ "af": "Klaar!Could you not access \"Digital Persona U.are.U 4000/4000B", "en": "Done!" }
{ "af": "Klank Menger", "en": "Sound Mixer" }
{ "af": "Stoor die gekose kontakte as 'n VCard", "en": "Save selected contacts as a vCard" }
{ "af": "Alle stelsels het gewerk normaal.", "en": "Systems checked out normally." }
{ "af": "Jy spring voor 'n trein in?", "en": "You just throw yourself under a train?" }
{ "af": "Geen kieslys item: '% 1'.", "en": "No such function \"%1\"" }
{ "af": "Verwyder Opskrif", "en": "Autosave interval :" }
{ "af": "Dit sal jou goeddoen Jy sal nie meer so agressief wees nie", "en": "It will be good for you. Really. Not so aggressive next time." }
{ "af": "Verwyder van projek", "en": "Delete & Remove From Project" }

Dataset Card for OPUS-100

Dataset Summary

OPUS-100 is an English-centric multilingual corpus covering 100 languages.

OPUS-100 is English-centric, meaning that all training pairs include English on either the source or target side. The corpus covers 100 languages (including English). The languages were selected based on the volume of parallel data available in OPUS.

Supported Tasks and Leaderboards

Translation.

Languages

OPUS-100 contains approximately 55M sentence pairs. Of the 99 language pairs, 44 have 1M sentence pairs of training data, 73 have at least 100k, and 95 have at least 10k.

Dataset Structure

Data Instances

{
  "translation": {
    "ca": "El departament de bombers té el seu propi equip d'investigació.",
    "en": "Well, the fire department has its own investigative unit."
  }
}

Data Fields

  • translation (dict): Parallel sentences for the pair of languages.

Data Splits

The dataset is split into training, development, and test portions. Data was prepared by randomly sampled up to 1M sentence pairs per language pair for training and up to 2000 each for development and test. To ensure that there was no overlap (at the monolingual sentence level) between the training and development/test data, they applied a filter during sampling to exclude sentences that had already been sampled. Note that this was done cross-lingually so that, for instance, an English sentence in the Portuguese-English portion of the training data could not occur in the Hindi-English test set.

Dataset Creation

Curation Rationale

[More Information Needed]

Source Data

[More Information Needed]

Initial Data Collection and Normalization

[More Information Needed]

Who are the source language producers?

[More Information Needed]

Annotations

Annotation process

[More Information Needed]

Who are the annotators?

[More Information Needed]

Personal and Sensitive Information

[More Information Needed]

Considerations for Using the Data

Social Impact of Dataset

[More Information Needed]

Discussion of Biases

[More Information Needed]

Other Known Limitations

[More Information Needed]

Additional Information

Dataset Curators

[More Information Needed]

Licensing Information

[More Information Needed]

Citation Information

If you use this corpus, please cite the paper:

@inproceedings{zhang-etal-2020-improving,
    title = "Improving Massively Multilingual Neural Machine Translation and Zero-Shot Translation",
    author = "Zhang, Biao  and
      Williams, Philip  and
      Titov, Ivan  and
      Sennrich, Rico",
    editor = "Jurafsky, Dan  and
      Chai, Joyce  and
      Schluter, Natalie  and
      Tetreault, Joel",
    booktitle = "Proceedings of the 58th Annual Meeting of the Association for Computational Linguistics",
    month = jul,
    year = "2020",
    address = "Online",
    publisher = "Association for Computational Linguistics",
    url = "https://aclanthology.org/2020.acl-main.148",
    doi = "10.18653/v1/2020.acl-main.148",
    pages = "1628--1639",
}

and, please, also acknowledge OPUS:

@inproceedings{tiedemann-2012-parallel,
    title = "Parallel Data, Tools and Interfaces in {OPUS}",
    author = {Tiedemann, J{\"o}rg},
    editor = "Calzolari, Nicoletta  and
      Choukri, Khalid  and
      Declerck, Thierry  and
      Do{\u{g}}an, Mehmet U{\u{g}}ur  and
      Maegaard, Bente  and
      Mariani, Joseph  and
      Moreno, Asuncion  and
      Odijk, Jan  and
      Piperidis, Stelios",
    booktitle = "Proceedings of the Eighth International Conference on Language Resources and Evaluation ({LREC}'12)",
    month = may,
    year = "2012",
    address = "Istanbul, Turkey",
    publisher = "European Language Resources Association (ELRA)",
    url = "http://www.lrec-conf.org/proceedings/lrec2012/pdf/463_Paper.pdf",
    pages = "2214--2218",
}

Contributions

Thanks to @vasudevgupta7 for adding this dataset.

Downloads last month
1,673

Models trained or fine-tuned on Helsinki-NLP/opus-100

Collection including Helsinki-NLP/opus-100