PUBMED First Authors efetch -db pubmed -id 6271474,5685784,4882854,6243420 -format xml | xtract -pattern PubmedArticle -element MedlineCitation/PMID "#Author" \ -block Author -position first -sep " " -element Initials,LastName \ -block Article -element ArticleTitle 6271474 5 MJ Casadaban Tn3: transposition and control. 5685784 2 RK Mortimer Suppressors and suppressible mutations in yeast. 4882854 2 ED Garber Proteins and enzymes as taxonomic tools. 6243420 1 NR Cozzarelli DNA gyrase and the supercoiling of DNA. Formatted Authors efetch -db pubmed -id 1413997,6301692,781293 -format xml | xtract -pattern PubmedArticle -element MedlineCitation/PMID \ -block PubDate -sep "-" -element Year,Month,MedlineDate \ -block Author -sep " " -tab "" \ -element "&COM" Initials,LastName -COM "(|)" | perl -pe 's/(\t[^\t|]*)\|([^\t|]*)$/$1 and $2/; s/\|([^|]*)$/, and $1/; s/\|/, /g' 1413997 1992-Oct RK Mortimer, CR Contopoulou, and JS King 6301692 1983-Apr MA Krasnow and NR Cozzarelli 781293 1976-Jul MJ Casadaban Medical Subject Headings efetch -db pubmed -id 6092233,2539356,1937004 -format xml | xtract -pattern PubmedArticle -element MedlineCitation/PMID \ -block MeshHeading \ -subset DescriptorName -pfc "\n" -sep "|" -element @MajorTopicYN,DescriptorName \ -subset QualifierName -pfc " / " -sep "|" -element @MajorTopicYN,QualifierName | sed -e 's/N|//g' -e 's/Y|/*/g' 6092233 Base Sequence DNA Restriction Enzymes DNA, Fungal / genetics / *isolation & purification *Genes, Fungal ... Book Authors and Editors efetch -db pubmed -id 21433338 -format xml | xtract -pattern PubmedBookArticle \ -path BookDocument.AuthorList.Author -element LastName \ -path BookDocument.Book.AuthorList.Author -element LastName Fauci Desrosiers Coffin Hughes Varmus Heterogeneous Data efetch -db pubmed -id 21433338,17247418 -format xml | xtract -pattern "PubmedArticleSet/*" \ -group "Book/AuthorList" -element LastName \ -group "Article/AuthorList" -element LastName Coffin Hughes Varmus Lederberg Cavalli Lederberg Multiple Links esearch -db pubmed -query "conotoxin AND dopamine [MAJR]" | elink -target protein -cmd neighbor | xtract -pattern LinkSet -if Link/Id -element IdList/Id Link/Id 28666811 17105332 9506485 23624852 17105332 14657161 27532980 27532978 19424304 12944511 31542395 17105332 Markup Correction for id in 8475897 8988608 9698410 10194376 15949988 16271163 17282049 \ 19793852 20968289 21892341 22106757 22785267 22360335 23095895 23095897 \ 25435818 26433210 27672066 28635620 28976125 29547395 29869631 29869640 do efetch -db pubmed -format xml -id "$id" | xtract -pattern PubmedArticle -plg "\n\n" -sep "\n\n" -tab "\n\n" \ -element MedlineCitation/PMID ArticleTitle Abstract/AbstractText done Record Counts echo "diphtheria measles pertussis polio tuberculosis" | xargs -n 1 sh -c 'esearch -db pubmed -query "$0 [MESH]" | efilter -days 365 -datetype PDAT | xtract -pattern ENTREZ_DIRECT -lbl "$0" -element Count' diphtheria 33 measles 150 pertussis 85 polio 100 tuberculosis 1608 Elink -cited Equivalent elink_cited() { efetch -format uid | sort -n | uniq | join-into-groups-of 100 | while read ids do nquire -get https://icite.od.nih.gov/api/pubs -pmids "$ids" | xtract -j2x | xtract -pattern opt -element cited_by done | word-at-a-time | sort -n | uniq | epost -db pubmed } esearch -db pubmed -query "Beadle GW [AUTH] AND Tatum EL [AUTH]" | elink_cited | efetch -format abstract PMC Formatting Tag Removal efetch -db pmc -id 4729119 -format xml | xtract -mixed -pattern article -group p \ -position first -tab "\n\n" -element p -plain p | fold -w 70 -s | awk '{$1=$1};1' The intestinal cells of Caenorhabditis elegans are filled with heterogeneous granular organelles that are associated with specific organ functions. The best studied of these organelles ... The intestinal cells of Caenorhabditis elegans are filled with heterogeneous granular organelles that are associated with specific organ functions. The best studied of these organelles are lipid ... SEQUENCE Peptide Sequences esearch -db protein -query "conotoxin AND mat_peptide [FKEY]" | efetch -format gpc | xtract -insd complete mat_peptide "%peptide" product mol_wt peptide | grep -i conotoxin | sort -t $'\t' -u -k 2,2n | head -n 8 ADB43131.1 15 conotoxin Cal 1b 1708 LCCKRHHGCHPCGRT ADB43128.1 16 conotoxin Cal 5.1 1829 DPAPCCQHPIETCCRR AIC77105.1 17 conotoxin Lt1.4 1705 GCCSHPACDVNNPDICG ADB43129.1 18 conotoxin Cal 5.2 2008 MIQRSQCCAVKKNCCHVG ADD97803.1 20 conotoxin Cal 1.2 2206 AGCCPTIMYKTGACRTNRCR AIC77085.1 21 conotoxin Bt14.8 2574 NECDNCMRSFCSMIYEKCRLK ADB43125.1 22 conotoxin Cal 14.2 2157 GCPADCPNTCDSSNKCSPGFPG AIC77154.1 23 conotoxin Bt14.19 2578 VREKDCPPHPVPGMHKCVCLKTC Vitamin Biosynthesis esearch -db pubmed -query "lycopene cyclase" | elink -related | elink -target protein | efilter -organism rodents -source refseq | efetch -format docsum | xtract -pattern DocumentSummary -element AccessionVersion Title | grep -i carotene NP_001346539.1 beta,beta-carotene 9',10'-oxygenase isoform 2 [Mus musculus] NP_573480.1 beta,beta-carotene 9',10'-oxygenase isoform 1 [Mus musculus] NP_446100.2 beta,beta-carotene 15,15'-dioxygenase [Rattus norvegicus] NP_001121184.1 beta,beta-carotene 9',10'-oxygenase [Rattus norvegicus] NP_001156500.1 beta,beta-carotene 15,15'-dioxygenase isoform 2 [Mus musculus] NP_067461.2 beta,beta-carotene 15,15'-dioxygenase isoform 1 [Mus musculus] Coding Sequences efetch -db nucleotide -id J01636.1 -format gbc -style withparts | xtract -insd CDS gene sub_sequence J01636.1 lacI GTGAAACCAGTAACGTTATACGATGTCGCAGAGTATGCCG... J01636.1 lacZ ATGACCATGATTACGGATTCACTGGCCGTCGTTTTACAAC... J01636.1 lacY ATGTACTATTTAAAAAACACAAACTTTTGGATGTTCGGTT... J01636.1 lacA TTGAACATGCCAATGACCGAAAGAATAAGAGCAGGCAAGC... Sequence Subregion efetch -db nuccore -id U54469 -format gbc | xtract -pattern INSDSeq -nucleic INSDSeq_sequence[2881:1] | fold -w 60 CCGGTTTTAATGTAGGTTTTTATTAATATACTTTTCCGTCTAATCCATTATTGACAGTGA CTACAAAAAGCGGATAGATTTTATATTATGCCGATTTTTGATAACAAAGGGGGTTCCGTT TCGGTTTCGTTACGCGGGTCTTAGACAATAGTCACGATTAATCGCTACTGTTGCTTATAA ... 3'UTR Sequences #!/bin/bash -norc ThreePrimeUTRs() { xtract -pattern INSDSeq -ACC INSDSeq_accession-version -SEQ INSDSeq_sequence \ -block INSDFeature -if INSDFeature_key -equals CDS \ -pfc "\n" -element "&ACC" -rst -last INSDInterval_to -element "&SEQ" | while read acc pos seq do if [ $pos -lt ${#seq} ] then echo -e ">$acc 3'UTR: $((pos+1))..${#seq}" echo "${seq:$pos}" | fold -w 50 elif [ $pos -ge ${#seq} ] then echo -e ">$acc NO 3'UTR" fi done } esearch -db nuccore -query "5.5.1.19 [ECNO]" | efilter -molecule mrna -source refseq | efetch -format gbc | ThreePrimeUTRs >NM_001328461.1 3'UTR: 1737..1871 gatgaatatagagttactgtgttgtaagctaatcatcatactgatgcaag tgcattatcacatttacttctgctgatgattgttcataagattatgagtt agccatttatcaaaaaaaaaaaaaaaaaaaaaaaa >NM_001316759.1 3'UTR: 1628..1690 atccgagtaattcggaatcttgtccaattttatatagcctatattaatac ... Amino Acid Composition #!/bin/bash -norc abbrev=( Ala Asx Cys Asp Glu Phe Gly His Ile \ Xle Lys Leu Met Asn Pyl Pro Gln Arg \ Ser Thr Sec Val Trp Xxx Tyr Glx ) AminoAcidComp() { local count while read num lttr do idx=$(printf %i "'$lttr'") ofs=$((idx-97)) count[$ofs]="$num" done <<< "$1" for i in {0..25} do echo -e "${abbrev[$i]}\t${count[$i]-0}" done | sort } AminoAcidJoin() { result="" while read acc seq gene do comp="$(echo "$seq" | tr A-Z a-z | sed 's/[^a-z]//g' | fold -w 1 | sort-uniq-count)" current=$(AminoAcidComp "$comp") current=$(echo -e "GENE\t$gene\n$current") if [ -n "$result" ] then result=$(join -t $'\t' <(echo "$result") <(echo "$current")) else result=$current fi done echo "$result" | grep -e "GENE" -e "[1-9]" } ids="NP_001172026,NP_000509,NP_004001,NP_001243779" efetch -db protein -id "$ids" -format gpc | xtract -insd INSDSeq_sequence CDS gene | AminoAcidJoin GENE INS HBB DMD TTN Ala 10 15 210 2084 Arg 5 3 193 1640 Asn 3 6 153 1111 Asp 2 7 185 1720 Cys 6 2 35 513 Gln 7 3 301 942 Glu 8 8 379 3193 Gly 12 13 104 2066 His 2 9 84 478 Ile 2 0 165 2062 Leu 20 18 438 2117 Lys 2 11 282 2943 Met 2 2 79 398 Phe 3 8 77 908 Pro 6 7 130 2517 Ser 5 5 239 2463 Thr 3 7 194 2546 Trp 2 2 67 466 Tyr 4 3 61 999 Val 6 18 186 3184 GENE Chromosome Assignments esearch -db gene -query "calmodulin [PFN] AND mammalia [ORGN]" | efetch -format docsum | xtract -pattern DocumentSummary \ -def "-" -element Id Name MapLocation ScientificName 801 CALM1 14q32.11 Homo sapiens 808 CALM3 19q13.32 Homo sapiens 805 CALM2 2p21 Homo sapiens 24242 Calm1 6q32 Rattus norvegicus 12313 Calm1 12 E Mus musculus 326597 CALM - Bos taurus 50663 Calm2 6q12 Rattus norvegicus 24244 Calm3 1q21 Rattus norvegicus 12315 Calm3 7 9.15 cM Mus musculus 12314 Calm2 17 E4 Mus musculus 617095 CALM1 - Bos taurus 396838 CALM3 6 Sus scrofa ... Genome Range esearch -db gene -query "Homo sapiens [ORGN] AND Y [CHR]" | efilter -status alive | efetch -format docsum | xtract -pattern DocumentSummary -NAME Name -DESC Description \ -block GenomicInfoType -if ChrLoc -equals Y \ -min ChrStart,ChrStop -element "&NAME" "&DESC" | sort -k 1,1n | cut -f 2- | grep -v pseudogene | grep -v uncharacterized | between-two-genes ASMT IL3RA IL3RA interleukin 3 receptor subunit alpha SLC25A6 solute carrier family 25 member 6 LINC00106 long intergenic non-protein coding RNA 106 ASMTL-AS1 ASMTL antisense RNA 1 ASMTL acetylserotonin O-methyltransferase-like P2RY8 purinergic receptor P2Y8 AKAP17A A-kinase anchoring protein 17A ASMT acetylserotonin O-methyltransferase Centromere Position nquire -ftp ftp.ncbi.nlm.nih.gov pub/gdp ideogram_9606_GCF_000001305.14_850_V1 | grep acen | cut -f 1,2,6,7 | grep "^X\t" X p 58100001 61000000 X q 61000001 63800000 Gene Regions esearch -db gene -query "DDT [GENE] AND mouse [ORGN]" | efetch -format docsum | xtract -pattern GenomicInfoType -element ChrAccVer ChrStart ChrStop | xargs -n 3 sh -c 'efetch -db nuccore -format gb \ -id "$0" -chr_start "$1" -chr_stop "$2"' LOCUS NC_000076 2142 bp DNA linear CON 09-FEB-2015 DEFINITION Mus musculus strain C57BL/6J chromosome 10, GRCm38.p3 C57BL/6J. ACCESSION NC_000076 REGION: complement(75771233..75773374) GPC_000000783 VERSION NC_000076.6 ... FEATURES Location/Qualifiers source 1..2142 /organism="Mus musculus" /mol_type="genomic DNA" /strain="C57BL/6J" /db_xref="taxon:10090" /chromosome="10" gene 1..2142 /gene="Ddt" mRNA join(1..159,462..637,1869..2142) /gene="Ddt" /product="D-dopachrome tautomerase" /transcript_id="NM_010027.1" CDS join(52..159,462..637,1869..1941) /gene="Ddt" /codon_start=1 /product="D-dopachrome decarboxylase" /protein_id="NP_034157.1" /translation="MPFVELETNLPASRIPAGLENRLCAATATILDKPEDRVSVTIRP GMTLLMNKSTEPCAHLLVSSIGVVGTAEQNRTHSASFFKFLTEELSLDQDRIVIRFFP ... Recursive Data esearch -db gene -query "rbcL [GENE] AND maize [ORGN]" | efetch -format xml | xtract -pattern Entrezgene -block "**/Gene-commentary" \ -if Gene-commentary_type@value -equals genomic \ -tab "\n" -element Gene-commentary_accession | sort | uniq NC_001666 X86563 Z11973 Genes in Pathways esearch -db gene -query "PAH [GENE]" -organism human | elink -target biosystems | efilter -pathway wikipathways | elink -target gene | efetch -format docsum | xtract -pattern DocumentSummary -element Name Id Description | grep -v pseudogene | grep -v uncharacterized | sort -f AANAT 15 aralkylamine N-acetyltransferase ACADM 34 acyl-CoA dehydrogenase medium chain ACHE 43 acetylcholinesterase (Cartwright blood group) ADCYAP1 116 adenylate cyclase activating polypeptide 1 ... Gene Products for sym in HBB DMD TTN ATP7B HFE BRCA2 CFTR PAH PRNP RAG1 do esearch -db gene -query "$sym [GENE] AND human [ORGN]" | efilter -query "alive [PROP]" | efetch -format docsum | xtract -pattern GenomicInfoType \ -element ChrAccVer ChrStart ChrStop | while read acc str stp do efetch -db nuccore -format gbc \ -id "$acc" -chr_start "$str" -chr_stop "$stp" | xtract -insd CDS,mRNA INSDFeature_key "#INSDInterval" \ gene "%transcription" "%translation" \ product transcription translation | grep -i $'\t'"$sym"$'\t' done done NC_000011.10 mRNA 3 HBB 626 hemoglobin, beta ACATTTGCTT... NC_000011.10 CDS 3 HBB 147 hemoglobin subunit beta MVHLTPEEKS... NC_000023.11 mRNA 78 DMD 13805 dystrophin, transcript variant X2 AGGAAGATGA... NC_000023.11 mRNA 77 DMD 13794 dystrophin, transcript variant X6 ACTTTCCCCC... NC_000023.11 mRNA 77 DMD 13800 dystrophin, transcript variant X5 ACTTTCCCCC... NC_000023.11 mRNA 77 DMD 13785 dystrophin, transcript variant X7 ACTTTCCCCC... NC_000023.11 mRNA 74 DMD 13593 dystrophin, transcript variant X8 ACTTTCCCCC... NC_000023.11 mRNA 75 DMD 13625 dystrophin, transcript variant X9 ACTTTCCCCC... ... Unfiltered Gene Lookup for sym in ATP6 CBD HBB OPN1MW do esearch -db gene -query "$sym [GENE]" -organism human | efetch -format docsum | xtract -pattern DocumentSummary -def "-" -lbl "${sym}" \ -element NomenclatureSymbol Id Description CommonName done ATP6 MT-ATP6 4508 ATP synthase F0 subunit 6 human ATP6 - 6775074 ATP synthase F0 subunit 6 Neandertal ATP6 - 8923188 ATP synthase F0 subunit 6 Denisova hominin CBD OPN1MW 2652 opsin 1, medium wave sensitive human HBB HBB 3043 hemoglobin subunit beta human HBB KRT89P 85344 keratin 89 pseudogene human OPN1MW OPN1MW 2652 opsin 1, medium wave sensitive human OPN1MW OPN1MW3 101060233 opsin 1, medium wave sensitive 3 human Protein Coding Genes for sym in MT-ATP6 BRCA2 CFTR HBB HFE IL9R OPN1MW PAH do esearch -db gene -query "$sym [GENE]" -organism human | efilter -status alive -type coding | efetch -format docsum | xtract -pattern DocumentSummary \ -if NomenclatureSymbol -equals "${sym}" \ -lbl "${sym}" -element Id Chromosome Description done | ... MT-ATP6 4508 MT ATP synthase F0 subunit 6 BRCA2 675 13 BRCA2 DNA repair associated CFTR 1080 7 CF transmembrane conductance regulator HBB 3043 11 hemoglobin subunit beta HFE 3077 6 homeostatic iron regulator IL9R 3581 X, Y interleukin 9 receptor OPN1MW 2652 X opsin 1, medium wave sensitive PAH 5053 12 phenylalanine hydroxylase Common Pathways ... while IFS=$'\t' read sym uid chr desc do elink -db gene -id "$uid" -target biosystems | efilter -kind pathway | efetch -format docsum | xtract -pattern DocumentSummary -lbl "${sym}" \ -lower source -element externalid biosystemname done | sort -t $'\t' -k 2,2 -k 3,3 -k 1,1 | awk 'a[$3]++{ if(a[$3]==2){ print b }; print $0}; {b=$0}' MT-ATP6 kegg hsa01100 Metabolic pathways PAH kegg hsa01100 Metabolic pathways HBB reactome R-HSA-1430728 Metabolism MT-ATP6 reactome R-HSA-1430728 Metabolism PAH reactome R-HSA-1430728 Metabolism CFTR reactome R-HSA-162582 Signal Transduction OPN1MW reactome R-HSA-162582 Signal Transduction ... TAXONOMY Taxonomic Names esearch -db taxonomy -query "txid10090 [SBTR] OR camel [COMN]" | efetch -format docsum | xtract -pattern DocumentSummary -if CommonName \ -element Id ScientificName CommonName 57486 Mus musculus molossinus Japanese wild mouse 39442 Mus musculus musculus eastern European house mouse 35531 Mus musculus bactrianus southwestern Asian house mouse 10092 Mus musculus domesticus western European house mouse 10091 Mus musculus castaneus southeastern Asian house mouse 10090 Mus musculus house mouse 9838 Camelus dromedarius Arabian camel 9837 Camelus bactrianus Bactrian camel STRUCTURE Structural Similarity esearch -db structure -query "crotalus [ORGN] AND phospholipase A2" | elink -related | efilter -query "archaea [ORGN]" | efetch -format docsum | xtract -pattern DocumentSummary \ -if PdbClass -equals Hydrolase \ -element PdbAcc PdbDescr 3WIV Crystal Structure Of Pro-s324a/d356a 3WIU Crystal Structure Of Pro-s324a/l349a 3VV2 Crystal Structure Of Complex Form Between S324a-subtilisin And Mutant Tkpro 3VHQ Crystal Structure Of The Ca6 Site Mutant Of Pro-Sa-Subtilisin 2ZWP Crystal Structure Of Ca3 Site Mutant Of Pro-S324a ... SNP Amino Acid Substitutions esearch -db gene -query "OPN1MW [GENE] AND human [ORGN]" | elink -target snp | efetch -format json | xtract -j2x -set - -rec RS | xtract -pattern RS -pfx "rs" -RSID RS/refsnp_id \ -group protein -if name -contains missense \ -block variant -element "&RSID" seq_id \ inserted_sequence -tab "\n" -inc position | sort -t $'\t' -k 2,2 -k 4,4n -k 3,3f -k 1.3n | uniq | while read rsid accn res pos do if [ "$accn" != "$last" ] then seq=$(efetch -db protein -id "$accn" -format gpc < /dev/null | xtract -pattern INSDSeq -element INSDSeq_sequence) last=$accn fi echo ">$rsid [$accn $res@$pos]" echo "${seq:0:$pos-1}$res${seq:$pos}" | fold -w 50 done >rs1238141906 [NP_000504.1 K@41] maqqwslqrlagrhpqdsyedstqssiftytnsnstrgpfKgpnyhiapr wvyhltsvwmifvviasvftnglvlaatmkfkklrhplnwilvnlavadl aetviastisvvnqvygyfvlghpmcvlegytvslcgitglwslaiiswe ... Sequences Flanking SNPs #!/bin/bash -norc efetch -db snp -id 268 -format json | xtract -j2x -set - -rec RS | xtract -pattern RS -pfx "rs" -RSID RS/refsnp_id \ -group placements_with_allele \ -block allele -if seq_id -starts-with "NC_" \ -and inserted_sequence -differs-from deleted_sequence \ -element "&RSID" seq_id deleted_sequence \ inserted_sequence -tab "\n" -inc position | sort -t $'\t' -k 2,2 -k 5,5n -k 4,4f -k 1.3n | uniq | while read rsid accn del ins pos do lft=$(efetch -db nuccore -format fasta -id "$accn" \ -seq_start "$((pos-50))" -seq_stop "$((pos-1))" < /dev/null | grep -v '>' | tr -d '\n') ad=${#ins} sb=${#del} rgt=$(efetch -db nuccore -format fasta -id "$accn" \ -seq_start "$((pos+ad-sb+1))" -seq_stop "$((pos+ad-sb+50))" < /dev/null | grep -v '>' | tr -d '\n') echo "$rsid $accn $pos $del->$ins" echo "5': $lft" echo "3': $rgt" echo "" done rs268 NC_000008.10 19813529 A->G 5': CTGCTTGAGTTGTAGAAAGAACCGCTGCAACAATCTGGGCTATGAGATCA 3': TAAAGTCAGAGCCAAAAGAAGCAGCAAAATGTACCTGAAGACTCGTTCTC rs268 NC_000008.11 19956018 A->G 5': CTGCTTGAGTTGTAGAAAGAACCGCTGCAACAATCTGGGCTATGAGATCA 3': TAAAGTCAGAGCCAAAAGAAGCAGCAAAATGTACCTGAAGACTCGTTCTC EXTERNAL JSON Nested Array Expansion for ns in flat recurse plural depth do echo " $ns" echo nquire -get "http://mygene.info/v3" gene 2652 | xtract -j2x -set - -rec GeneRec -nest "$ns" | grep position | head -n 4 echo done "position": [ [ 154182595, 154182789 ], [ 154187769, 154188066 ], flat 154182595 154182789 154187769 154188066 recurse 154182595 154182789 plural 154182595 154182789 depth 154182595 154182789 Exon Interval Sets nquire -get "http://mygene.info/v3/gene/2652" | xtract -j2x -set - -rec GeneRec -nest plural | xtract -pattern GeneRec -group exons -lbl "" -clr \ -block positions -pfc "\n" -sep ".." -tab "\n" -element position 154182595..154182789 154187769..154188066 154190053..154190222 154191687..154191853 154193407..154193647 154195929..154196861 154219733..154219927 154224907..154225204 ... Heterogeneous Object Names nquire -get "http://mygene.info/v3/gene/2652" | xtract -j2x -set - -rec GeneRec | xtract -pattern GeneRec -group "pathway/*" -pfx "\n" -element "?,name,id" R-HSA-162582 Signal Transduction ... WP455 GPCRs, Class A Rhodopsin-like reactome Signal Transduction R-HSA-162582 reactome Disease R-HSA-1643685 reactome The retinoid cycle in cones (daylight vision) R-HSA-2187335 reactome Visual phototransduction R-HSA-2187338 reactome Retinoid cycle disease events R-HSA-2453864 reactome Diseases associated with visual transduction R-HSA-2474795 reactome Signaling by GPCR R-HSA-372790 reactome Class A/1 (Rhodopsin-like receptors) R-HSA-373076 reactome GPCR downstream signalling R-HSA-388396 reactome G alpha (i) signalling events R-HSA-418594 reactome Opsins R-HSA-419771 reactome GPCR ligand binding R-HSA-500792 reactome Diseases of signal transduction R-HSA-5663202 wikipathways GPCRs, Class A Rhodopsin-like WP455 XML Namespace Prefixes nquire -url "http://webservice.wikipathways.org" getPathway -pwId WP455 | xtract -pattern "ns1:getPathwayResponse" -element ":gpml" | transmute -decode64 | xtract -pattern Pathway -block Xref \ -if @Database -equals "Entrez Gene" \ -tab "\n" -element @ID | sort -n 134 135 136 140 146 ... LOCAL ARCHIVE Entrez Indexing efetch -db pubmed -id 12857958,2981625 -format xml | xtract -e2index | xtract -pattern IdxDocument -UID IdxUid \ -block NORM -pfc "\n" -element "&UID",NORM,"@pos" 12857958 allow 205 12857958 assays 147 12857958 binding 146 12857958 braid 187,215 12857958 braiding 153 ... Author Frequency esearch -db pubmed -query "rattlesnake phospholipase" | efetch -format uid | fetch-pubmed | xtract -pattern PubmedArticle -block Author \ -sep " " -tab "\n" -element LastName,Initials | sort-uniq-count-rank 40 Marangoni S 33 Toyama MH 28 Soares AM 25 Bon C ... Author Counts esearch -db pubmed -query "conotoxin" | efetch -format uid | fetch-pubmed | xtract -pattern PubmedArticle -num Author | sort-uniq-count -n | reorder-columns 2 1 | head -n 15 | tee /dev/tty | xy-plot auth.png 0 11 1 193 2 854 3 844 4 699 5 588 6 439 7 291 8 187 9 124 10 122 11 58 12 33 13 18 900 + | ******** 800 + * ** | * * 700 + * *** | * ** 600 + * * | * *** 500 + * ** | * *** 400 + * ** | * * 300 + * *** | * * 200 + * ****** | * ********* 100 + ** * | * ********** 0 + * ****** +---------+---------+---------+---------+---------+---------+---------+ 0 2 4 6 8 10 12 14 Title and Abstract Word Counts esearch -db pubmed -query "conotoxin" -pub structured | efetch -format uid | fetch-pubmed | xtract -stops -wrp "Set,Rec" \ -pattern PubmedArticle -wrp "PMID" -element MedlineCitation/PMID \ -wrp "Titl" -words ArticleTitle \ -block Abstract/AbstractText -wrp "Grp,Abst" -words AbstractText | xtract -pattern Rec -element PMID -num Titl -block Grp -tab ", " -num Abst 29194563 21 63, 84, 89, 26 28882644 23 87, 34, 115, 25 28877214 10 12, 42, 315, 94 28825343 15 169 28482835 9 75, 123, 42, 37 28479398 15 170, 130 ... Verbosity Per Year esearch -db pubmed -query "PNAS [JOUR]" -pub abstract | efetch -format uid | stream-pubmed | gunzip -c | xtract -stops -wrp Set,Rec -pattern PubmedArticle \ -wrp "Year" -year "PubDate/*" \ -wrp "Abst" -words Abstract/AbstractText | xtract -wrp Set,Pub -pattern Rec \ -wrp "Year" -element Year \ -wrp "Num" -num Abst > countsByYear.xml for yr in {1960..2020} do cat countsByYear.xml | xtract -wrp Raw -pattern Pub -select Year -eq "$yr" | xtract -pattern Raw -lbl "$yr" -avg Num done | tee /dev/tty | xy-plot verbosity.png rm countsByYear.xml Appending Metadata esearch -db pubmed -query "PNAS [JOUR]" -pub abstract | efetch -format uid | fetch-pubmed > pnas.xml cat pnas.xml | xtract -stops -wrp Set,Rec -pattern PubmedArticle \ -wrp ID -element MedlineCitation/PMID \ -wrp Abst -words Abstract/AbstractText | 31822623 foxp3cd4regulatory... ... xtract -pattern Rec -element ID -wrp Num -num Abst > counts.txt 31822623 243 31822622 132 31822621 252 31822620 238 ... xtract -input pnas.xml -wrp PubmedArticleSet -pattern PubmedArticle \ -select MedlineCitation/PMID -appending counts.txt > merged.xml LOCAL INDEX Histogram Shortcut cat $EDIRECT_PUBMED_MASTER/Current/*.xml | xtract -timer -pattern PubmedArticle -histogram PubDate/Month 26 8 37 9 121475 01 114579 02 111137 03 109794 04 120169 05 130062 06 125107 07 126246 08 123191 09 120957 10 109657 11 110854 12 1958892 Apr 1809730 Aug 2086169 Dec 1844717 Feb 1851803 Jan 1784258 Jul 2015942 Jun 1943325 Mar 1815691 May 1889194 Nov 2035632 Oct 1 October 1956569 Sep Month Format Per Year cat $EDIRECT_PUBMED_MASTER/Current/*.xml | xtract -wrp Set,Rec -pattern PubmedArticle \ -if PubDate/Month -wrp YR -year "PubDate/*" -wrp MN -len PubDate/Month | xtract -wrp Set,Rec -pattern Rec \ -pfx "
" -sep "+-" -sfx "-
" -element YR,MN | xtract -pattern Rec -histogram DT | reorder-columns 2 1 | tr '+' '\t' | sed -e 's/-3-/1/g' -e 's/-2-/2/g' -e 's/-1-/3/g' -e 's/-[0-9]-/4/g' | sort -k 1,1n -k 2,2n > rawMonthCounts.txt result=$( cat rawMonthCounts.txt | cut -f 1 | uniq ) for i in {1..4} do current=$( cat rawMonthCounts.txt | grep "\t$i\t" | cut -f 1,3 ) result=$(join -a 1 -t $'\t' <(echo "$result") <(echo "$current")) done echo "$result" > plotme.txt cat plotme.txt | xy-plot Phrase Query Automation ascend_mesh_tree() { var="${1%\*}" while : do phrase-search -count "$var* [TREE]" case "$var" in *.* ) var="${var%????}" ;; * ) break ;; esac done } ascend_mesh_tree "C01.925.782.417.415" 5148 c01 925 782 417 415* 26792 c01 925 782 417* 607883 c01 925 782* 870516 c01 925* 2541697 c01* Medical Subject Heading Code Viewers https://meshb.nlm.nih.gov/treeView https://meshb-prev.nlm.nih.gov/treeView MISCELLANEOUS Indexed Fields einfo -db pubmed | xtract -pattern Field \ -if IsDate -equals Y -and IsHidden -equals N \ -pfx "[" -sep "]\t" -element Name,FullName | sort -t $'\t' -k 2f [CDAT] Date - Completion [CRDT] Date - Create [EDAT] Date - Entrez [MHDA] Date - MeSH [MDAT] Date - Modification [PDAT] Date - Publication Pseudocode Prototype for each PubmedArticle { for each Author { print Initials LastName } for each MeshHeading { print DescriptorName for each QualifierName { print QualifierName } } } xtract -pattern PubmedArticle \ -block Author -element Initials LastName \ -block MeshHeading -element DescriptorName \ -subset QualifierName -element QualifierName Processing in Groups ... efetch -format acc | join-into-groups-of 200 | xargs -n 1 sh -c 'epost -db nuccore -format acc -id "$0" | efetch -format gb'